## borramos el espacion de trabajo
rm(list =ls() )

##importamos la librerias
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.4      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
##traemos el CSV de fumadores desde git
ruta <- "https://raw.githubusercontent.com/lacamposm/Fundamentos_Analitica/main/data/fumadores.csv"
df <- read.csv2(ruta, sep = ",", dec=".",stringsAsFactors = TRUE) 
#stringsAsFactors Sirve para que cuando venga una string la vuelva un factor
#dec = le decimos cual es el seprador decimal
#sep = le decimos cual es el separador del archivo
datatable(head(df,10))
## ejercicio numero 1
summary(df)
##       edad           sexo          bmi            hijos       fumador  
##  Min.   :18.00   hombre:676   Min.   :15.96   Min.   :0.000   no:1064  
##  1st Qu.:27.00   mujer :662   1st Qu.:26.30   1st Qu.:0.000   si: 274  
##  Median :39.00                Median :30.40   Median :1.000            
##  Mean   :39.21                Mean   :30.66   Mean   :1.095            
##  3rd Qu.:51.00                3rd Qu.:34.69   3rd Qu.:2.000            
##  Max.   :64.00                Max.   :53.13   Max.   :5.000            
##        region        prima      
##  nor_este :324   Min.   : 1122  
##  nor_oeste:325   1st Qu.: 4740  
##  sur_este :364   Median : 9382  
##  sur_oeste:325   Mean   :13270  
##                  3rd Qu.:16640  
##                  Max.   :63770
## Ejercicio 2
p1 <- ggplot(df) +
  aes(x = sexo) + geom_bar() + labs(y="cantidad") +
  theme(axis.text.y = element_blank(), axis.ticks = element_blank() )

ggplotly(p1)
## ejercicio 3
summary(df$prima)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1122    4740    9382   13270   16640   63770
## ejercicio 4

p4 <- df %>% group_by(fumador) %>% summarize(media=mean(prima))
resul <- ((p4[2,2] / p4[1,2]) -1 )*100
resul[1,1]
## [1] 280.0001
sprintf("la diferencia porcentual es de %f ", resul[1,1])
## [1] "la diferencia porcentual es de 280.000146 "
resul <- as.character(round(resul[1,1],2))
paste(" la diferencia porcentual es de ",resul,"%")
## [1] " la diferencia porcentual es de  280 %"
##ejercicio 5
p5 <- ggplot(df) +
  aes(x = sexo, fill=fumador) + 
  geom_bar(position = position_fill(reverse = TRUE), color = 'black' ) + labs(y="cantidad") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )

ggplotly(p5)
# ejercicio 66
eje6 <- df %>% group_by(sexo) %>% filter(fumador=='si') %>% summarise(percel_25 = quantile(prima, probs=(0.25)),percel_50 = quantile(prima, probs=(0.50)),percel_75 = quantile(prima, probs=(0.75)),media = round(mean(prima),2))

eje6
## # A tibble: 2 × 5
##   sexo   percel_25 percel_50 percel_75  media
##   <fct>      <dbl>     <dbl>     <dbl>  <dbl>
## 1 hombre    21242.    36085.    41798. 33042.
## 2 mujer     19696.    28950.    40918. 30679
fuma <- df %>% filter(fumador == "si")
ggplot(fuma) + aes(x=prima, color = sexo) +geom_density()

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.